---
title: "Busara Dashboard"
author: "Rémi Thériault"
output:
flexdashboard::flex_dashboard:
orientation: columns
vertical_layout: fill
social: menu
source_code: embed
# theme: lumen
storyboard: false
params:
API_TOKEN_PUBMED: API_TOKEN_PUBMED
---
```{r setup, include=FALSE}
library(flexdashboard)
query_pubmed <- TRUE
```
```{r packages}
# Load packages
library(extrafont)
library(easyPubMed)
library(dplyr)
library(purrr)
library(stringr)
library(stringi)
library(DT)
library(fuzzyjoin)
library(countrycode)
library(Ecfun)
library(tidyr)
library(waffle)
library(plotly)
library(ggplot2)
library(ggflags)
library(knitr)
library(lubridate)
library(xts)
library(tibble)
library(dygraphs)
library(rempsyc)
library(RColorBrewer)
```
```{r API_TOKEN_PUBMED, eval=query_pubmed}
API_TOKEN_PUBMED <- keyring::key_get("pubmed", "rempsyc")
# API_TOKEN_PUBMED <- Sys.getenv("API_TOKEN_PUBMED")
# API_TOKEN_PUBMED <- params$API_TOKEN_PUBMED
if (API_TOKEN_PUBMED == "") stop("API_TOKEN_PUBMED is an empty string. Terminating workflow.")
if (nchar(API_TOKEN_PUBMED) != 36) stop("API_TOKEN_PUBMED is not 36 characters-long. Terminating workflow.")
```
```{r batch_pubmed_download, results='hide', eval=query_pubmed}
# Download data
d.fls <- batch_pubmed_download(
pubmed_query_string = paste(
"Developmental Psychology [Journal]",
"OR Journal of Personality and Social Psychology [Journal]",
"OR Journal of Abnormal Psychology [Journal]",
"OR Journal of Family Psychology [Journal]",
"OR Health Psychology [Journal]",
"OR Journal of Educational Psychology [Journal]",
"AND ('1940/01/01'[Date - Publication] : '1949/12/31'[Date - Publication])"
),
dest_file_prefix = "data/easyPubMed_data_",
api_key = API_TOKEN_PUBMED,
batch_size = 5000)
```
```{r all_articles_to_df, eval=query_pubmed}
# Convert XLM data to a data frame of first authors
# articles.df <- table_articles_byAuth(d.fls, included_authors = "first")
all_articles_to_df <- function(d.fls){
y <- lapply(seq_along(d.fls), function(x) {
list.articles <- articles_to_list(d.fls[x])
list.articles.df <- lapply(list.articles, article_to_df)
articles.df <- do.call(rbind, list.articles.df)
})
do.call(rbind, y)
}
articles.df <- all_articles_to_df(d.fls)
articles.df <- articles.df %>%
filter(!duplicated(pmid))
```
```{r address_split, message=FALSE, warning=FALSE, eval=query_pubmed}
# Split address in university and department
addr.split <- str_split(articles.df$address, ",")
split_address <- function(addr.split, string) {
ind1 <- map(addr.split, ~which(grepl(string, .x)))
ind1 <- imap(addr.split, \(x, idx) pluck(ind1[[idx]])[1])
ind1 <- imap(addr.split, \(x, idx) pluck(x, ind1[[idx]]))
ind1 <- trimws(as.character(ind1))
ind1 <- replace(ind1, ind1 == "NULL", NA)
}
string.dep <- "Department|Faculty|Center|School|Unit|Institute|Centre|Division|Unidad"
dep <- split_address(addr.split, string.dep)
string.uni <- "University"
uni <- split_address(addr.split, string.uni)
string.uni2 <- "University|College|School|Institute|Center|Centre|CEMIC, CONICET|CNRS|INSEAD"
uni2 <- split_address(addr.split, string.uni2)
uni3 <- ifelse(!is.na(uni), uni, uni2)
articles.df <- articles.df %>%
mutate(department = dep,
university = uni3)
articles.df <- articles.df %>%
select(journal, year, university, department, address, lastname, firstname,
month, day, jabbrv, title, doi, pmid, abstract)
```
```{r correct_universities, eval=query_pubmed}
# Correct a few university names manually
articles.df <- articles.df %>%
mutate(university = case_when(
university == "Stony Brook University" ~ "University of Colorado at Colorado Springs",
university == "Technion-Israel Institute of Technology" ~ "Technion - Israel Institute of Technology",
university == "University of Montreal" ~ "Université de Montréal",
university == "Philipps-University of Marburg" ~ "Phillips-Universität Marburg",
university == "University of Wisconsin-Madison" ~ "University of Wisconsin - Madison",
TRUE ~ university
))
```
```{r countries_data, eval=query_pubmed}
# Download university + country data
countries <- read.csv(
"https://raw.githubusercontent.com/endSly/world-universities-csv/master/world-universities.csv",
header = FALSE)
names(countries) <- c("country_code", "university", "website")
countries <- countries[1:2]
# Correct/add a few university countries manually
countries <- countries %>%
mutate(
country_code = replace(country_code,
university == "University of the Netherlands Antilles, Curacao", "CW"),
country_code = replace(country_code,
university == "University of Sint Eustatius School of Medicine", "SX"),
country_code = replace(country_code,
university == "St.James's School of Medicine, Bonaire" |
university == "American University of the Caribbean, Sint Maarten" |
university == "International University School of Medicine (IUSOM)", "BQ")) %>%
add_row(country_code = "US", university = "University of Colorado") %>%
add_row(country_code = "US", university = "Stony Brook University") %>%
add_row(country_code = "GB", university = "York St John University") %>%
add_row(country_code = "DE", university = "Max Planck Institute for Human Development") %>%
add_row(country_code = "AR", university = "CEMIC, CONICET") %>%
add_row(country_code = "VN", university = "SWPS University of Social Sciences and Humanities") %>%
add_row(country_code = "SG", university = "Yale-NUS College") %>%
add_row(country_code = "US", university = "Rutgers Business School-Newark and New Brunswick") %>%
add_row(country_code = "BR", university = "Institute D'Or for Research and Teaching") %>%
add_row(country_code = "GB", university = "Moray House School of Education and Sport") %>%
add_row(country_code = "AU", university = "Institute for Positive Psychology and Education") %>%
add_row(country_code = "FR", university = "CNRS") %>%
add_row(country_code = "US", university = "Columbia Business School") %>%
add_row(country_code = "US", university = "Office of Population Research") %>%
add_row(country_code = "US", university = "School of Family Life") %>%
add_row(country_code = "CH", university = "Jacobs Center for Productive Youth Development") %>%
add_row(country_code = "KR", university = "SKK Graduate School of Business") %>%
add_row(country_code = "SG", university = "Lee Kong Chian School of Business") %>%
add_row(country_code = "US", university = "Tepper School of Business") %>%
add_row(country_code = "US", university = "Stephen M. Ross School of Business") %>%
add_row(country_code = "US", university = "Fuqua School of Business") %>%
add_row(country_code = "US", university = "Jones Graduate School of Business") %>%
add_row(country_code = "US", university = "Questrom School of Business") %>%
add_row(country_code = "US", university = "Booth School of Business") %>%
add_row(country_code = "FR", university = "INSEAD") %>%
add_row(country_code = "BE", university = "University of Liege") %>%
add_row(country_code = "KR", university = "Sungkyunkwan University") %>%
add_row(country_code = "DE", university = "University of Tubingen") %>%
add_row(country_code = "AU", university = "Melbourne School of Psychological Sciences") %>%
add_row(country_code = "AT", university = "University of Vienna") %>%
add_row(country_code = "NL", university = "Vrije Universiteit Amsterdam") %>%
add_row(country_code = "CA", university = "Montreal Behavioural Medicine Centre (MBMC)") %>%
add_row(country_code = "GB", university = "Manchester Centre for Health Psychology") %>%
add_row(country_code = "US", university = "Annenberg School for Communication and Journalism") %>%
add_row(country_code = "US", university =
"National Center for Posttraumatic Stress Disorder at VA Boston Healthcare System") %>%
add_row(country_code = "US", university = "Cincinnati Children's Hospital Medical Center") %>%
add_row(country_code = "US", university = "University of Wisconsin")
# Custom function for two-sided vlookup
partial_vlookup <- function(pattern, lookup_vector) {
out <- map_chr(pattern, \(x) {
out <- grep(x, lookup_vector, value = TRUE, fixed = TRUE)[1]
if (is.na(out)) {
out <- rgrep(lookup_vector, x, value = TRUE, fixed = TRUE)[1]
}
out
})
out
}
```
```{r partial_vlookup, eval=query_pubmed}
# Match universities and countries
articles.df2 <- articles.df %>%
mutate(
university_old = university,
university = partial_vlookup(university, countries$university),
university = ifelse(is.na(university), partial_vlookup(address, countries$university), university),
.after = university)
articles.df3 <- articles.df2 %>%
left_join(countries, by = "university", multiple = "first") %>%
relocate(country_code, .after = year)
```
```{r country_code_conversion, eval=query_pubmed}
# Get full name country, continent, and region
articles.df4 <- articles.df3 %>%
mutate(doi = paste0("https://doi.org/", doi),
country = countrycode(country_code, "genc2c", "country.name"),
region = countrycode(country_code, "genc2c", "un.regionsub.name"),
continent = countrycode(country_code, "genc2c", "continent"),
continent = case_when(continent == "Americas" ~ region,
TRUE ~ continent),
.after = country_code) %>%
mutate(date = paste(year, month, day, sep = "-"),
date = as_date(date)) %>%
ungroup() %>%
mutate()
```
```{r save file}
saveRDS(articles.df4, "data/articles_1940_1949.rds")
```